********************************************************************************
** 	TITLE:		il2021_polls		                                          ** 	
**	AUTHOR:	    Philippe Mongrain                                             **
**	DATE:		October 2022 					                              **	
**	VERSION:	Stata 16					                                  **
********************************************************************************

* Version control

version 16.0

* Import data

import excel surveys, firstrow sheet(il2021) clear
destring _all, replace

* Generate survey date

format %tdMon_DD,_CCYY polldate

* Generate election date

gen edate = 20210323

gen electiondate = date(string(edate,"%8.0f"),"YMD")

format %tdMon_DD,_CCYY electiondate

* Time of survey

gen time = (electiondate - polldate) - 1

* Percentage of seats

replace likud = (likud/120)*100
replace yeshatid = (yeshatid/120)*100
replace bluewhite = (bluewhite/120)*100
replace joint = (joint/120)*100
replace shas = (shas/120)*100
replace utj = (utj/120)*100
replace beiteinu = (beiteinu/120)*100
replace meretz = (meretz/120)*100
replace raam = (raam/120)*100
replace yamina = (yamina/120)*100
replace newhope = (newhope/120)*100
replace labor = (labor/120)*100
replace zionist = (zionist/120)*100
replace neweconomic = (neweconomic/120)*100

* Generate mean vote intention value by day

bysort time : egen likudvote = mean(likud)
bysort time : egen yeshatidvote = mean(yeshatid)
bysort time : egen bluewhitevote = mean(bluewhite)
bysort time : egen jointvote = mean(joint)
bysort time : egen shasvote = mean(shas)
bysort time : egen utjvote = mean(utj)
bysort time : egen beiteinuvote = mean(beiteinu)
bysort time : egen meretzvote = mean(meretz)
bysort time : egen raamvote = mean(raam)
bysort time : egen yaminavote = mean(yamina)
bysort time : egen newhopevote = mean(newhope)
bysort time : egen laborvote = mean(labor)
bysort time : egen zionistvote = mean(zionist)
bysort time : egen neweconomicvote = mean(neweconomic)

sort time

* Drop duplicates

duplicates tag polldate, gen(dup)
duplicates drop polldate, force
drop dup

* Reshape the dataset

rename likud v_likud
rename yeshatid v_yeshatid
rename bluewhite v_bluewhite
rename joint v_joint
rename shas v_shas
rename utj v_utj
rename beiteinu v_beiteinu
rename meretz v_meretz
rename raam v_raam
rename yamina v_yamina
rename newhope v_newhope
rename labor v_labor
rename zionist v_zionist
rename neweconomic v_neweconomic

reshape long v_, i(polldate) j(party) string

rename v_ vote

keep polldate electiondate party vote poll time

order poll polldate electiondate time party vote

* Generate rank of parties

gsort polldate -vote

bysort polldate : gen rank = _n

gen first = party if rank == 1
gen second = party if rank == 2
gen third = party if rank == 3

bysort polldate : gen winner = first[1]
bysort polldate : gen runnerup = second[2]
bysort polldate : gen thirdplace = third[3]

* Generate poll margin

bysort polldate : gen pollmar = vote[1] - vote[2]

* Drop duplicates

duplicates tag polldate, gen(dup)
duplicates drop polldate, force
drop dup

* Misleading poll

gen misleading = 0 if winner == "likud" & pollmar >= 1
replace misleading = 1 if misleading!=0

* Save

drop if time == 0 | time == .

keep polldate pollmar misleading time

save "il2021_polls.dta", replace